if (!require("BiocManager"))
    install.packages("BiocManager")
## Loading required package: BiocManager
## Warning: package 'BiocManager' was built under R version 4.4.3
## Bioconductor version '3.20' is out-of-date; the current release version '3.21'
##   is available with R version '4.5'; see https://bioconductor.org/install
BiocManager::install("maftools")
## Bioconductor version 3.20 (BiocManager 1.30.25), R 4.4.1 (2024-06-14 ucrt)
## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'maftools'
## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.4.1/library
##   packages:
##     boot, class, cluster, foreign, KernSmooth, lattice, MASS, Matrix, mgcv,
##     nlme, nnet, rpart, spatial, survival
## Old packages: 'BiocManager', 'BiocParallel', 'cli', 'data.table', 'Deriv',
##   'evaluate', 'ggforce', 'ggnewscale', 'pheatmap', 'rlang', 'RPostgreSQL',
##   'RSQLite', 'tibble', 'xfun'
library(maftools)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(pheatmap)
## Warning: package 'pheatmap' was built under R version 4.4.3
anno_df <- annovarToMaf(
  annovar  = "combined_trunc_missense_families.hg19_multianno.txt",
  Center   = "CSI-NUS",
  refBuild = "hg19",
  tsbCol   = "Samples",
  table    = "refGene",
  sep      = "\t"
)
## -Reading annovar files
## --Extracting tx, exon, txchange and aa-change
## -Adding Variant_Type
## Finished in 1.590s elapsed (1.410s cpu)
families_list <- list(
  "18" = c("HPC18", "HPC109", "HPC505"),
  "25" = c("HPC8", "HPC25"),
  "29" = c("HPC29", "HPC77", "HPC84"),
  "32" = c("HPC31", "HPC32"),
  "33" = c("HPC33", "HPC39", "HPC56"),
  "52" = c("HPC52", "HPC417"),
  "57" = c("HPC57", "HPC79", "HPC80"),
  "62" = c("HPC21", "HPC62"),
  "67" = c("HPC67", "HPC110"),
  "102" = c("HPC102", "HPC107"),
  "112" = c("HPC112", "HPC124"),
  "120" = c("HPC120", "HPC397", "HPC488"),
  "123" = c("HPC123", "HPC206", "HPC495"),
  "136" = c("HPC136", "HPC502"),
  "164" = c("HPC164", "HPC486"),
  "172" = c("HPC172", "HPC489"),
  "176" = c("HPC176", "HPC212"),
  "181" = c("HPC181", "HPC525"),
  "192" = c("HPC192", "HPC209"),
  "199" = c("HPC199", "HPC484"),
  "201" = c("HPC201", "HPC520"),
  "204" = c("HPC204", "HPC503"),
  "213" = c("HPC213", "HPC509"),
  "214" = c("HPC214", "HPC261"),
  "220" = c("HPC220", "HPC528"),
  "229" = c("HPC229", "HPC401"),
  "232" = c("HPC232", "HPC529"),
  "234" = c("HPC234", "HPC518"),
  "241" = c("HPC241", "HPC491"),
  "258" = c("HPC128", "HPC258"),
  "259" = c("HPC259", "HPC521"),
  "264" = c("HPC210", "HPC264"),
  "267" = c("HPC267", "HPC514"),
  "282" = c("HPC282", "HPC511"),
  "304" = c("HPC304", "HPC459"),
  "325" = c("HPC193", "HPC325"),
  "328" = c("HPC328", "HPC513"),
  "329" = c("HPC329", "HPC506"),
  "331" = c("HPC331", "HPC482"),
  "387" = c("HPC387", "HPC516"),
  "420" = c("HPC420", "HPC507"),
  "460" = c("HPC460", "HPC522"),
  "470" = c("HPC470", "HPC512"),
  "510" = c("HPC114", "HPC510"),
  "524" = c("HPC524", "HPC527")
)
filtered_anno_df <- anno_df %>%
  as_tibble() %>%
  mutate(
    AF_nfe_num             = suppressWarnings(as.numeric(AF_nfe)),
    ExAC_nontcga_NFE_num   = suppressWarnings(as.numeric(ExAC_nontcga_NFE)),
    AF_popmax_num          = suppressWarnings(as.numeric(AF_popmax))
  ) %>%
  filter(
    AF_nfe == "." | AF_nfe_num <= 0.01,
    Family != ".",
    Variant_Classification != "Unknown",
    Variant_Classification != "Translation_Start_Site",
    !CLNSIG %in% c("Benign", "Likely_benign", "Benign/Likely_benign"),
    ExAC_nontcga_NFE == "." | ExAC_nontcga_NFE_num <= 0.01,
    AF_popmax == "." | AF_popmax_num < 0.01
  ) %>%
  dplyr::select(-AF_nfe_num, -ExAC_nontcga_NFE_num, -AF_popmax_num)


nrow(filtered_anno_df)
## [1] 552
#separar por Sample:
expanded_s_anno_df <- filtered_anno_df %>%
  separate_rows(Tumor_Sample_Barcode, sep = ",") %>% 
  mutate(Tumor_Sample_Barcode = trimws(Tumor_Sample_Barcode))
nrow(expanded_s_anno_df)
## [1] 2139
library(data.table)
## Warning: package 'data.table' was built under R version 4.4.3
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(maftools)

maf_object <- read.maf(
  maf      = expanded_s_anno_df,
  isTCGA   = FALSE
)
## -Validating
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.140s elapsed (0.070s cpu)
# cria sample2fam
sample2fam <- do.call(rbind, lapply(names(families_list), function(fam) {
  data.frame(
    Tumor_Sample_Barcode = families_list[[fam]],
    Family               = fam,
    stringsAsFactors     = FALSE
  )
}))
sample2fam <- unique(sample2fam)  # remove duplicados

# extrai clinicalData e faz merge
clin <- getClinicalData(maf_object)
clin2 <- merge(
  clin,
  sample2fam,
  by                = "Tumor_Sample_Barcode",
  all.x             = TRUE,  # mantém todas amostras do MAF
  sort              = FALSE
)

# atribui de volta ao slot clínico
maf_object@clinical.data <- clin2
oncoplot(
  maf                     = maf_object,
  clinicalFeatures        = "Family",
  sortByAnnotation        = TRUE,
  showTumorSampleBarcodes = FALSE,
  top                     = 15
)

plotmafSummary(maf = maf_object, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

mafbarplot(maf_object)

getSampleSummary(maf_object)
##     Tumor_Sample_Barcode Frame_Shift_Del Frame_Shift_Ins In_Frame_Del
##                   <fctr>           <int>           <int>        <int>
##  1:               HPC124               7               3            5
##  2:               HPC112               7               2            5
##  3:                HPC79               2               5           10
##  4:               HPC109               4               4            5
##  5:               HPC495               5               1            7
##  6:               HPC120               5               4            4
##  7:               HPC201               4               1            3
##  8:               HPC397               6               2            6
##  9:               HPC164               4               1            6
## 10:               HPC520               2               1            2
## 11:                HPC84               3               3            2
## 12:               HPC486               2               0            5
## 13:               HPC102               5               5            5
## 14:               HPC206               3               1            4
## 15:                HPC80               0               3            8
## 16:               HPC420               0               4            4
## 17:               HPC488               2               5            4
## 18:               HPC507               0               4            4
## 19:                HPC18               5               3            3
## 20:                HPC39               3               3            4
## 21:               HPC128               5               1            3
## 22:               HPC232               4               3            3
## 23:                HPC25               5               2            4
## 24:               HPC264               2               2            3
## 25:                HPC77               2               3            3
## 26:               HPC229               3               2            5
## 27:               HPC241               2               3            7
## 28:               HPC258               4               1            5
## 29:               HPC491               2               2            7
## 30:               HPC505               5               1            1
## 31:               HPC107               3               5            3
## 32:               HPC123               1               0            8
## 33:               HPC210               2               1            2
## 34:               HPC234               5               2            4
## 35:               HPC259               2               1            1
## 36:               HPC482               1               1            3
## 37:               HPC521               4               1            2
## 38:               HPC529               4               4            4
## 39:               HPC114               5               1            2
## 40:               HPC204               5               1            2
## 41:               HPC261               3               0            3
## 42:               HPC331               2               1            2
## 43:               HPC401               1               2            6
## 44:               HPC509               6               5            4
## 45:               HPC528               4               3            2
## 46:                HPC29               1               3            4
## 47:               HPC503               5               1            2
## 48:               HPC510               6               2            1
## 49:               HPC192               2               1            4
## 50:               HPC209               3               0            5
## 51:                HPC32               2               1            3
## 52:               HPC459               2               2            4
## 53:               HPC522               2               2            2
## 54:               HPC212               4               3            2
## 55:               HPC220               3               2            4
## 56:               HPC329               5               2            1
## 57:                HPC33               2               1            3
## 58:                HPC56               2               1            3
## 59:                HPC57               1               4            4
## 60:               HPC214               1               1            3
## 61:               HPC304               1               2            2
## 62:                 HPC8               3               2            4
## 63:               HPC176               5               1            1
## 64:                HPC21               2               1            4
## 65:                HPC31               1               2            2
## 66:               HPC460               2               3            4
## 67:               HPC506               2               2            1
## 68:               HPC518               4               1            4
## 69:               HPC193               0               4            4
## 70:               HPC213               4               4            1
## 71:               HPC387               4               1            3
## 72:               HPC524               4               2            1
## 73:                HPC62               1               5            2
## 74:               HPC267               2               3            4
## 75:               HPC325               0               2            4
## 76:               HPC514               2               1            2
## 77:               HPC527               3               0            2
## 78:               HPC172               1               0            2
## 79:               HPC417               2               2            1
## 80:               HPC489               1               0            0
## 81:                HPC52               3               2            0
## 82:               HPC110               2               1            3
## 83:               HPC181               1               3            3
## 84:               HPC470               0               2            3
## 85:               HPC512               1               2            1
## 86:               HPC282               1               1            3
## 87:               HPC511               1               1            2
## 88:                HPC67               2               0            1
## 89:               HPC136               0               1            2
## 90:               HPC516               3               1            2
## 91:               HPC502               0               0            3
## 92:               HPC484               0               2            1
## 93:               HPC513               1               1            2
## 94:               HPC525               1               0            1
## 95:               HPC328               0               1            1
## 96:               HPC199               1               0            1
##     Tumor_Sample_Barcode Frame_Shift_Del Frame_Shift_Ins In_Frame_Del
##     In_Frame_Ins Missense_Mutation Nonsense_Mutation Nonstop_Mutation total
##            <int>             <int>             <int>            <int> <num>
##  1:            4                12                 7                0    38
##  2:            4                12                 6                0    36
##  3:            4                 8                 7                0    36
##  4:            2                14                 5                0    34
##  5:            2                15                 4                0    34
##  6:            6                10                 3                1    33
##  7:            6                10                 8                1    33
##  8:            4                10                 4                1    33
##  9:            5                 7                 9                0    32
## 10:            8                 8                10                1    32
## 11:            5                 8                11                0    32
## 12:            5                11                 8                0    31
## 13:            5                 6                 4                0    30
## 14:            2                16                 4                0    30
## 15:            5                 7                 7                0    30
## 16:            3                11                 7                0    29
## 17:            5                 8                 4                1    29
## 18:            4                 9                 8                0    29
## 19:            2                11                 4                0    28
## 20:            5                 8                 5                0    28
## 21:            3                 7                 8                0    27
## 22:            4                 5                 8                0    27
## 23:            6                 5                 5                0    27
## 24:            4                 8                 8                0    27
## 25:            3                 8                 8                0    27
## 26:            5                 5                 6                0    26
## 27:            2                 5                 7                0    26
## 28:            2                 7                 7                0    26
## 29:            2                 6                 7                0    26
## 30:            2                 9                 8                0    26
## 31:            4                 6                 4                0    25
## 32:            2                 9                 5                0    25
## 33:            3                 9                 8                0    25
## 34:            4                 3                 7                0    25
## 35:            8                 5                 8                0    25
## 36:            3                 9                 8                0    25
## 37:            7                 6                 5                0    25
## 38:            2                 5                 6                0    25
## 39:            5                 7                 4                0    24
## 40:            3                10                 3                0    24
## 41:            9                 4                 4                1    24
## 42:            2                10                 7                0    24
## 43:            3                 7                 5                0    24
## 44:            3                 3                 3                0    24
## 45:            5                 5                 5                0    24
## 46:            1                 6                 8                0    23
## 47:            2                 9                 4                0    23
## 48:            4                 5                 5                0    23
## 49:            1                 6                 8                0    22
## 50:            2                 6                 6                0    22
## 51:            2                 9                 5                0    22
## 52:            2                 8                 4                0    22
## 53:            2                 9                 5                0    22
## 54:            3                 5                 4                0    21
## 55:            2                 5                 5                0    21
## 56:            2                 5                 6                0    21
## 57:            5                 7                 3                0    21
## 58:            4                 6                 5                0    21
## 59:            5                 4                 3                0    21
## 60:            5                 4                 5                1    20
## 61:            3                 7                 5                0    20
## 62:            2                 5                 4                0    20
## 63:            2                 8                 2                0    19
## 64:            2                 5                 5                0    19
## 65:            4                 7                 3                0    19
## 66:            1                 6                 3                0    19
## 67:            1                 6                 7                0    19
## 68:            3                 2                 5                0    19
## 69:            3                 4                 3                0    18
## 70:            3                 3                 3                0    18
## 71:            5                 1                 4                0    18
## 72:            5                 3                 3                0    18
## 73:            2                 4                 4                0    18
## 74:            0                 2                 6                0    17
## 75:            4                 5                 2                0    17
## 76:            4                 3                 5                0    17
## 77:            6                 5                 1                0    17
## 78:            2                 5                 6                0    16
## 79:            2                 3                 6                0    16
## 80:            5                 5                 5                0    16
## 81:            2                 3                 6                0    16
## 82:            4                 2                 3                0    15
## 83:            4                 2                 2                0    15
## 84:            2                 2                 6                0    15
## 85:            3                 2                 6                0    15
## 86:            2                 2                 3                0    12
## 87:            0                 3                 5                0    12
## 88:            4                 3                 2                0    12
## 89:            3                 1                 4                0    11
## 90:            3                 0                 2                0    11
## 91:            3                 2                 2                0    10
## 92:            4                 0                 2                0     9
## 93:            2                 3                 0                0     9
## 94:            3                 1                 3                0     9
## 95:            4                 0                 1                0     7
## 96:            3                 1                 0                0     6
##     In_Frame_Ins Missense_Mutation Nonsense_Mutation Nonstop_Mutation total
maf.titv = titv(maf = maf_object, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = maf.titv)

tbl <- table(expanded_s_anno_df$Tumor_Sample_Barcode)

# 2) Ordena desc e vê as top 10
top10 <- sort(tbl, decreasing=TRUE)[1:10]
print(top10)
## 
## HPC124 HPC112  HPC79 HPC109 HPC495 HPC120 HPC201 HPC397 HPC164 HPC520 
##     38     36     36     34     34     33     33     33     32     32
gene_summary <- getGeneSummary(maf_object)
top_genes <- gene_summary$Hugo_Symbol[1:10]
print(top_genes)
##  [1] "PABPC3"   "ASPN"     "ATN1"     "NOTCH4"   "HLA-DRB5" "MAGI1"   
##  [7] "NCF1"     "EPHB6"    "CHST15"   "PTP4A1"
lollipopPlot(maf = maf_object,
             gene = 'PABPC3',
             AACol='aaChange',
             showMutationRate = TRUE)

lollipopPlot(maf = maf_object,
             gene = 'ASPN',
             AACol='aaChange',
             showMutationRate = TRUE)
## 2 transcripts available. Use arguments refSeqID or proteinID to manually specify tx name.
##      HGNC    refseq.ID   protein.ID aa.length
##    <char>       <char>       <char>     <num>
## 1:   ASPN NM_001193335 NP_001180264       242
## 2:   ASPN    NM_017680    NP_060150       379
## Using longer transcript NM_017680 for now.

# Top 10 amostras por número de variantes
top10 <- sort(tbl, decreasing = TRUE)[1:10]

#  monta o data.frame
top10_df <- data.frame(
  Sample = names(top10),
  Count  = as.integer(top10),
  stringsAsFactors = FALSE
)

# Reordena os níveis de Sample pelo Count
top10_df$Sample <- factor(
  top10_df$Sample,
  levels = top10_df$Sample[order(top10_df$Count)]
)

#  gráfico com ggplot2

ggplot(top10_df, aes(x = Sample, y = Count, fill = Sample)) +
  geom_col(width = 0.7, show.legend = FALSE) +
  geom_text(
    aes(label = Count),    
    vjust = -0.5,
    size  = 2.5,
    color = "black"
  ) +
  scale_fill_brewer(palette = "Set3") +
  labs(
    title = "Top 10 Amostras Mais Mutadas",
    x     = "Amostra",
    y     = "Número de Mutações"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title  = element_text(face = "bold", hjust = 0.5)
  )

rainfallPlot(
  maf = subsetMaf(maf = maf_object, tsb = "HPC124"),
  detectChangePoints = TRUE,
  pointSize = 0.8
)
## -Processing clinical data
## Processing HPC124..
## No changepoints detected!

maf.sig = oncodrive(maf = maf_object, AACol='aaChange', minMut = 5, pvalMethod = 'zscore')
## Warning in oncodrive(maf = maf_object, AACol = "aaChange", minMut = 5,
## pvalMethod = "zscore"): Oncodrive has been superseeded by OncodriveCLUSTL. See
## http://bg.upf.edu/group/projects/oncodrive-clust.php
## No syn mutations found! Skipping background estimation. Using predefined values. (Mean = 0.279; SD = 0.13)
## Estimating cluster scores from non-syn variants..
##   |                                                                              |                                                                      |   0%  |                                                                              |==                                                                    |   2%  |                                                                              |===                                                                   |   5%  |                                                                              |=====                                                                 |   7%  |                                                                              |=======                                                               |   9%  |                                                                              |========                                                              |  12%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  16%  |                                                                              |=============                                                         |  19%  |                                                                              |===============                                                       |  21%  |                                                                              |================                                                      |  23%  |                                                                              |==================                                                    |  26%  |                                                                              |====================                                                  |  28%  |                                                                              |=====================                                                 |  30%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  35%  |                                                                              |==========================                                            |  37%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  42%  |                                                                              |===============================                                       |  44%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  49%  |                                                                              |====================================                                  |  51%  |                                                                              |=====================================                                 |  53%  |                                                                              |=======================================                               |  56%  |                                                                              |=========================================                             |  58%  |                                                                              |==========================================                            |  60%  |                                                                              |============================================                          |  63%  |                                                                              |==============================================                        |  65%  |                                                                              |===============================================                       |  67%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  72%  |                                                                              |====================================================                  |  74%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  79%  |                                                                              |=========================================================             |  81%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  86%  |                                                                              |==============================================================        |  88%  |                                                                              |===============================================================       |  91%  |                                                                              |=================================================================     |  93%  |                                                                              |===================================================================   |  95%  |                                                                              |====================================================================  |  98%  |                                                                              |======================================================================| 100%
## Comapring with background model and estimating p-values..
## Done !
plotOncodrive(res = maf.sig, fdrCutOff = 0.1, useFraction = TRUE, labelSize = 0.5)

Expandir por familia em vez de samples:

#por familia:
expanded_f_anno_df <- filtered_anno_df %>%
  separate_rows(Family, sep = ",") %>% 
  mutate(Family = trimws(Family))
nrow(expanded_f_anno_df)
## [1] 773
# Contar quantas variantes cada família tem em anno_df_final
family_counts <- expanded_f_anno_df %>%
  group_by(Family) %>%
  summarise(n_variants = n(), .groups="drop") %>%
  arrange(desc(n_variants))

ggplot(family_counts, aes(x = reorder(Family, n_variants), y = n_variants, fill = n_variants)) +
  geom_col(width = 0.5, show.legend = FALSE) +  
  geom_text(aes(label = n_variants), hjust = -0.8, size = 2.5) +
  scale_fill_viridis_c(option = "C", direction = -1) +
  labs(
    title = "Familias por numero de Mutacoes",
    x     = "Familia",
    y     = "Numero de Variantes"
  ) +
  coord_flip() +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.y = element_text(size = 7),
    plot.title  = element_text(face = "bold", hjust = 0.5)
  )

#Para cada (Hugo_Symbol, Family), mantem apenas uma linha (variants diferentes no mesmo gene-fam contam uma vez)
gene_family_distinct <- expanded_f_anno_df %>%
  distinct(Hugo_Symbol, Family)

# Conta quantas famílias diferentes cada gene tem
gene_family_counts <- gene_family_distinct %>%
  group_by(Hugo_Symbol) %>%
  summarise(
    n_families = n(),    # número de famílias em que esse gene está mutado
    .groups = "drop"
  ) %>%
  arrange(desc(n_families))

#  Veja as top 10
top10_genes_families <- gene_family_counts %>% slice_head(n = 11)
# filtrar o unknown
top10_genes_families <- top10_genes_families %>% filter(Hugo_Symbol!='Unknown')

# plote um barplot das top 10
ggplot(top10_genes_families, aes(x = reorder(Hugo_Symbol, n_families), y = n_families, fill = n_families)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n_families), vjust = -0.3, size = 3.5) +
  scale_fill_viridis_c(option = "C", direction = -1) +
  labs(
    title = "Top 10 Genes Most Frequently Mutated in Families",
    x     = "Gene",
    y     = "Number of Families"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title  = element_text(face = "bold", hjust = 0.5)
  )

top30_genes <- gene_family_counts %>%
  slice_head(n = 31) %>%
  pull(Hugo_Symbol)
# Aqui, usamos cleaned_anno_df_family (que já explode cada variante por família)
# e contamos quantas mutações (linhas) cada (gene, família) tem, mas como variantes diferentes no
# mesmo (gene, família) queremos contar todas
gene_family_counts_detailed <- expanded_f_anno_df %>%
  filter(Hugo_Symbol %in% top30_genes) %>%       # manter só os Top 30 genes
  group_by(Hugo_Symbol, Family) %>%
  summarise(mutation_count = n(), .groups = "drop")

gene_family_counts_detailed<- gene_family_counts_detailed %>% filter(Hugo_Symbol!='Unknown')

#  Transformar em matriz wide-format: colunas = famílias, linhas = genes, valores = mutation_count
gene_family_matrix <- gene_family_counts_detailed %>%
  pivot_wider(names_from = Family,
              values_from = mutation_count,
              values_fill = 0)

# Ajustar a estrutura para pheatmap
gene_family_mat <- as.data.frame(gene_family_matrix)
rownames(gene_family_mat) <- gene_family_mat$Hugo_Symbol
gene_family_mat$Hugo_Symbol <- NULL
gene_family_mat <- as.matrix(gene_family_mat)

#  plotar o heatmap dos Top 30 genes vs. famílias
pheatmap(
  gene_family_mat,
  cluster_rows = TRUE,
  cluster_cols = TRUE,
  color = colorRampPalette(c("white", "orange", "red"))(100),
  main = "Heatmap: Numero de Mutacoes (Top 30 Genes) por Familia",
  fontsize_row = 6,
  fontsize_col = 7
)

pws = pathways(maf = maf_object, plotType = 'treemap')
## Summarizing signalling pathways [Sanchez-Vega et al., https://doi.org/10.1016/j.cell.2018.03.035]

plotPathways(maf = maf_object, pathlist = pws)

Graficos maftools por familia:

expanded_f_anno_df_2 <- filtered_anno_df %>%
separate_rows(Family, sep = ",") %>%
mutate(
Family = trimws(Family),
Tumor_Sample_Barcode = Family # usa a família como identificador principal
)

maf_fam_object <- read.maf(
maf      = expanded_f_anno_df_2,
isTCGA   = FALSE
)
## -Validating
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.070s elapsed (0.050s cpu)
fam_metadata <- data.frame(
Tumor_Sample_Barcode = unique(expanded_f_anno_df$Family),
grupo = "grupoX", # ou outras variáveis por família
stringsAsFactors = FALSE
)

maf_fam_object@clinical.data <- merge(
getClinicalData(maf_fam_object),
fam_metadata,
by = "Tumor_Sample_Barcode",
all.x = TRUE,
sort = FALSE
)
oncoplot(
  maf              = maf_fam_object,
  top              = 20,                  # top 20 genes mais mutados
  sortByAnnotation = TRUE,                # ordena pelas colunas clínicas, se houver
  showTumorSampleBarcodes = TRUE,         # mostra nomes das "amostras" (aqui: famílias)
  draw_titv        = FALSE,               # tira gráfico de Ti/Tv no topo
  fontSize         = 0.8,
  removeNonMutated = TRUE               
)

plotmafSummary(maf = maf_fam_object, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

mafbarplot(maf_fam_object)

maf.titv = titv(maf = maf_fam_object, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = maf.titv)

lollipopPlot(maf = maf_fam_object,
             gene = 'PABPC3',
             AACol='aaChange',
             showMutationRate = TRUE)

lollipopPlot(maf = maf_fam_object,
             gene = 'ASPN',
             AACol='aaChange',
             showMutationRate = TRUE)
## 2 transcripts available. Use arguments refSeqID or proteinID to manually specify tx name.
##      HGNC    refseq.ID   protein.ID aa.length
##    <char>       <char>       <char>     <num>
## 1:   ASPN NM_001193335 NP_001180264       242
## 2:   ASPN    NM_017680    NP_060150       379
## Using longer transcript NM_017680 for now.

library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.4.2
## 
## clusterProfiler v4.14.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang,
## W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize
## multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.4.2
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:data.table':
## 
##     first, second
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following object is masked from 'package:data.table':
## 
##     shift
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
# Lista original
gene_list <- gene_family_counts$Hugo_Symbol[1:20]


# Mapeamento de símbolos para ENTREZID
entrez_ids <- bitr(
  gene_list,
  fromType = "SYMBOL",
  toType   = "ENTREZID",
  OrgDb    = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
entrez_ids_total <- bitr(
  gene_family_counts$Hugo_Symbol,
  fromType = "SYMBOL",
  toType   = "ENTREZID",
  OrgDb    = org.Hs.eg.db
)
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(gene_family_counts$Hugo_Symbol, fromType = "SYMBOL", toType =
## "ENTREZID", : 1.19% of input gene IDs are fail to map...

GO (Gene Ontology)enrichGO()

ego <- enrichGO(
  gene         = entrez_ids$ENTREZID,
  OrgDb        = org.Hs.eg.db,
  ont          = "BP",
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.2,
  readable     = TRUE
)
dotplot(ego, showCategory = 20)

KEGG PathwaysenrichKEGG()

ekegg <- enrichKEGG(
  gene         = entrez_ids$ENTREZID,
  organism     = "hsa",         
  pvalueCutoff = 0.05
)
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
dotplot(ekegg, showCategory = 20)

pws = pathways(maf = maf_fam_object, plotType = 'treemap')
## Summarizing signalling pathways [Sanchez-Vega et al., https://doi.org/10.1016/j.cell.2018.03.035]

plotPathways(maf = maf_fam_object, pathlist = pws)

library(maftools)


oncodrive_res <- oncodrive(
  maf       = maf_fam_object,
  AACol     = "aaChange",  
  minMut   = 5,           
  pvalMethod = "zscore"
)
## Warning in oncodrive(maf = maf_fam_object, AACol = "aaChange", minMut = 5, :
## Oncodrive has been superseeded by OncodriveCLUSTL. See
## http://bg.upf.edu/group/projects/oncodrive-clust.php
## No syn mutations found! Skipping background estimation. Using predefined values. (Mean = 0.279; SD = 0.13)
## Estimating cluster scores from non-syn variants..
##   |                                                                              |                                                                      |   0%  |                                                                              |======                                                                |   9%  |                                                                              |=============                                                         |  18%  |                                                                              |===================                                                   |  27%  |                                                                              |=========================                                             |  36%  |                                                                              |================================                                      |  45%  |                                                                              |======================================                                |  55%  |                                                                              |=============================================                         |  64%  |                                                                              |===================================================                   |  73%  |                                                                              |=========================================================             |  82%  |                                                                              |================================================================      |  91%  |                                                                              |======================================================================| 100%
## Comapring with background model and estimating p-values..
## Done !
head(oncodrive_res$res)
## NULL
plotOncodrive(oncodrive_res, fdrCutOff = 0.05)

oncodrive_input <- expanded_f_anno_df_2 %>%
  transmute(
    CHROMOSOME = Chromosome,
    POSITION   = Start_Position,
    REF        = Reference_Allele,
    ALT        = Tumor_Seq_Allele2,
    SAMPLE     = Tumor_Sample_Barcode
  )

write.table(
  oncodrive_input,
  file = "oncodrive_input.txt",
  sep  = "\t",
  quote = FALSE,
  row.names = FALSE
)
# Carrega pacotes necessários
library(dplyr)
library(ggplot2)

# Lê o output do OncodriveFML
res <- read.delim("input-oncodrivefml.tsv", stringsAsFactors = FALSE)

# Visualiza as primeiras linhas
head(res)
##           GENE_ID MUTS MUTS_RECURRENCE SAMPLES  P_VALUE    Q_VALUE P_VALUE_NEG
## 1 ENSG00000151846   56               2      36 1.00e-07 0.00000460     1.00000
## 2 ENSG00000131951    2               1       2 1.20e-04 0.00276000     0.99998
## 3 ENSG00000148288    2               1       2 8.00e-04 0.01226667     0.99967
## 4 ENSG00000185069    2               1       2 1.27e-03 0.01460500     0.99895
## 5 ENSG00000168594    2               1       2 2.44e-03 0.02244800     0.99831
## 6 ENSG00000115828    2               1       2 3.98e-03 0.03051333     0.99623
##   Q_VALUE_NEG SNP MNP INDELS SYMBOL
## 1           1  56   0      0 PABPC3
## 2           1   2   0      0  LRRC9
## 3           1   2   0      0  GBGT1
## 4           1   2   0      0  KRT76
## 5           1   0   0      2 ADAM29
## 6           1   2   0      0   QPCT
# Selecionar genes com Q_VALUE < 0.1
sig_genes <- res %>%
  filter(Q_VALUE < 0.1) %>%
  arrange(Q_VALUE)

# Verifica o top 10
head(sig_genes[, c("SYMBOL", "MUTS", "SAMPLES", "P_VALUE", "Q_VALUE")], 10)
##    SYMBOL MUTS SAMPLES   P_VALUE    Q_VALUE
## 1  PABPC3   56      36 0.0000001 0.00000460
## 2   LRRC9    2       2 0.0001200 0.00276000
## 3   GBGT1    2       2 0.0008000 0.01226667
## 4   KRT76    2       2 0.0012700 0.01460500
## 5  ADAM29    2       2 0.0024400 0.02244800
## 6    QPCT    2       2 0.0039800 0.03051333
## 7  KCNJ12    2       2 0.0052300 0.03436857
## 8   DCDC1    2       2 0.0069300 0.03984750
## 9   ASCL3    2       2 0.0136000 0.06951111
## 10  ABCG5    2       2 0.0201500 0.09269000
# Exporta a tabela filtrada para usar em relatórios
write.table(
  sig_genes,
  file      = "OncodriveFML_sig_genes_q0.1.tsv",
  sep       = "\t",
  quote     = FALSE,
  row.names = FALSE
)

# Adiciona coluna de –log10(p‑value)
sig_genes <- sig_genes %>%
  mutate(log10P = -log10(P_VALUE))

# Gráfico de barras horizontais: genes por –log10(p‑value)
ggplot(sig_genes, aes(
    x = reorder(SYMBOL, log10P),
    y = log10P,
    fill = log10P
  )) +
  geom_col() +
  coord_flip() +
  labs(
    x = "Gene",
    y = "-log10(p_value)",
    title = "Genes com FM_bias significativo (q < 0.1)",
    subtitle = paste0("Total: ", nrow(sig_genes), " genes")
  ) +
  scale_fill_viridis_c(option = "C") +
  theme_minimal()